## Recebe apenas a coluna de paises
df_paises <- df_dimensions %>%
# df_paises <- df_dimensions_sample %>%
dplyr::select(id, research_org_country_names) %>%
dplyr::filter(research_org_country_names != "")
paises <- df_paises$research_org_country_names
## Separa em uma lista de mais de um elemento quando possui mais de um país
paises_split <- paises %>%
stringr::str_split(., ';')
## remove todos os caractéres menos letras e números
#list <- lapply(paises, stringr::str_replace_all, ";", "0")
## Apenas valores únicos, para listar todos os países (sem repetição)
unique_values <- unique(rapply(paises_split, function(x) head(x, 30)))
altimetrics.score - 248763 de 443511 linhas em branco (NA, NULL)
clinical_trial_ids - 430207 de 443511 linhas em branco (NA, NULL ou "")
times_cited - 281 de 443511 linhas em branco (NA, NULL)
| Name | df_dimensions$altmetrics…. |
| Number of rows | 443511 |
| Number of columns | 1 |
| _______________________ | |
| Column type frequency: | |
| numeric | 1 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| data | 248763 | 0.44 | 62.57 | 449.78 | 1 | 2 | 5 | 17 | 35478 | ▇▁▁▁▁ |
## Warning: Ignoring 248763 observations
| Name | df_dimensions$clinical_tr… |
| Number of rows | 443511 |
| Number of columns | 1 |
| _______________________ | |
| Column type frequency: | |
| character | 1 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| data | 0 | 1 | 0 | 4907 | 430207 | 9763 | 0 |
| Name | df_dimensions$metrics.tim… |
| Number of rows | 443511 |
| Number of columns | 1 |
| _______________________ | |
| Column type frequency: | |
| numeric | 1 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| data | 281 | 1 | 5.06 | 70.17 | 0 | 0 | 0 | 1 | 19868 | ▇▁▁▁▁ |
## Warning: Ignoring 281 observations
Ano - 0 de 443511 linhas em branco (NA, NULL)
País - 136992 de 443511 linhas em branco (NA, NULL ou "") Nesse caso, verifica se existe na linha um ou mais países, ou se a linha está vazia.
Tipo de publicação - 0 de 443511 linhas em branco (NA, NULL). Podem ser do tipo: article, book, chapter, monograph, preprint ou proceeding
| Name | df_dimensions$type |
| Number of rows | 443511 |
| Number of columns | 1 |
| _______________________ | |
| Column type frequency: | |
| character | 1 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| data | 0 | 1 | 4 | 10 | 0 | 6 | 0 |
## Carregando pacotes exigidos: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
## Carregando pacotes exigidos: sp
## rgdal: version: 1.5-23, (SVN revision 1121)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.0.4, released 2020/01/28
## Path to GDAL shared files: /usr/share/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
## Path to PROJ shared files: /usr/share/proj
## Linking to sp version:1.4-5
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
Funders - 381381 de 443511 linhas em branco (NA, NULL)
| Name | df_dimensions$funder_orgs |
| Number of rows | 443511 |
| Number of columns | 1 |
| _______________________ | |
| Column type frequency: | |
| character | 1 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| data | 0 | 1 | 0 | 377 | 381381 | 17638 | 0 |
| Name | df_dimensions$type |
| Number of rows | 443511 |
| Number of columns | 1 |
| _______________________ | |
| Column type frequency: | |
| character | 1 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| data | 0 | 1 | 4 | 10 | 0 | 6 | 0 |
## skim_type skim_variable n_missing complete_rate character.min
## 1: character funder_orgs 0 1.0000000 0
## 2: character raw_affiliations 0 1.0000000 3
## 3: character research_org_cities 0 1.0000000 0
## 4: character research_org_city_names 0 1.0000000 0
## 5: character research_org_countries 17 0.9999617 0
## 6: character research_org_country_names 0 1.0000000 0
## 7: character research_org_state_codes 0 1.0000000 0
## 8: character research_org_state_names 0 1.0000000 0
## 9: character research_orgs 0 1.0000000 0
## character.max character.empty character.n_unique character.whitespace
## 1: 377 381381 17638 0
## 2: 183028 0 296018 0
## 3: 2234 139600 104138 0
## 4: 2507 139526 118867 0
## 5: 203 136992 24220 0
## 6: 580 136992 24165 0
## 7: 236 332512 14803 0
## 8: 377 332512 14991 0
## 9: 3523 166739 139136 0
## [1] "Temos 0 linhas em branco, na coluna de afiliações dos autores"
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html